//// Marketing Taxation
//// Simon Hoellerbauer
//// Lucy Martin, Brigitte Seim, Luis Camacho

// NOT RUN WITH NEW FILE PATHS

/*
THIS DO FILE CLEANS & RESHAPES THE BASELINE DATA FOR ANALYSIS.

IF NECESSARY REPLACE FILE PATH IN USE STATEMENT WITH FULL FILE PATH FOR
marketvendor_BASELINEFINAL_noID_raw.dta IN data/1_raw/ FOLDER

STATA DO FILE WILL NOT FOLLOW R PROJECT PATH COMPLETION; MAKE SURE TO SPECIFY
THE FULL PATH.

THERE IS A SAVE STATEMENT AT THE END OF THIS DO FILE; THE OUTPUT IS SAVED
IN THE data/1_raw/ FOLDER BECAUSE IT WILL BE FURTHER PROCESSED BY THE R SCRIPT
cleaning_baseline.R

TO DO:
*/
clear all
set more off

* Set paths & Globals

	use "data\1_raw\marketvendor_BASELINEFINAL_noID_raw.dta"

	set scheme s1mono

****CLEANING VARIABLES****

/// GENERAL INFO VARIABLES
**make suvey type a labeled value so that it's easier to use late
	encode surveytype, generate(surveytype2)
	label var surveytype2 "1=long, 2=short"

/// DEMOGRAPHIC VARIABLES
**This section cleans the major demographic variables

**Generate data section divider
	gen Demographics =.
	label var Demographics "--------------------"

//r1 (gender variables)
	**creates dummy, 1 if male, 0 if female
	gen male = 1 if r1 == 0
	replace male = 0 if r1 == 1
	label var male "Dummy. 1 if male, 0 if female"
	label define male 1 "Male" 0 "Female"
	label values male male

	** dummy, 1 if female, 0 if male
	clonevar female = r1
	label var female "Dummy. 1 if female, 0 if male"

//d1 (Home District) - Long Survey
**Clean d1
	gen home_dist = d1 if d1 != "-77"
	label var home_dist "Respondent's birth district (String)"

//d2 (Current District) - Long Survey
	gen curr_dist = d2 if d2 != "-77"
	label var curr_dist "District in which respondent lives (String)"

//d5 (Tribe) - Long Survey
	gen tribe = d5 if d5 > 0
	label var tribe "Respondent's tribe (Labeled integer)"
	label values tribe d5

//d8 (AGE)
**Clean d8
	gen age = d8
	replace age =. if d8 < 0
	label var age "Respondent's age (Integer)"

**make new age variable where sample mean ages of those in sample in each age range
**are inserted for those respondents who only knew age range, not full age
	clonevar age2 = age
	quiet sum age if age >= 18 & age <= 29
	replace age2 = r(mean) if d9 == 1
	quiet sum age if age >= 30 & age <= 39
	replace age2 = r(mean) if d9 == 2
	quiet sum age if age >= 40 & age <= 49
	replace age2 = r(mean) if d9 == 3
	quiet sum age if age >= 50
	replace age2 = r(mean) if d9 == 4
	label var age2 "Age, combined with age range, using sample mean of age range (d9) (Integer)"

**make new age variable where median of those in sample in each age range are
**inerted for those respondents who only knew age range, not full age
	clonevar age3 = age
	quiet sum age if age >= 18 & age <= 29, detail
	replace age3 = r(p50) if d9 == 1
	quiet sum age if age >= 30 & age <= 39, detail
	replace age3 = r(p50) if d9 == 2
	quiet sum age if age >= 40 & age <= 49, detail
	replace age3 = r(p50) if d9 == 3
	quiet sum age if age >= 50, detail
	replace age3 = r(p50) if d9 == 4
	label var age3 "Age, combined with age range, using sample mean of age range (d9) (Integer)"


	/* unnecessary
	gen age_range = d9
	replace age_range = 1 if age >= 18 & age <= 29
	replace age_range = 2 if age >= 30 & age <= 39
	replace age_range = 3 if age >= 40 & age <= 49
	replace age_range = 4 if age >= 50

	*Creating new value labels for this variable
	label define age_range 1 "18-29" 2 "30-39" 3 "40-49" 4 "Over 50"
	label values age_range age_range
	label var age_range "Respondent's age (1:18-29, 2:30-39, 3:40-49, 4:Over 50, Combined with d9)
	*/

//d12 (Education)
**Clean d12
	gen education = d12 + 1 if d12 >= 0
	replace education = 0 if  d12 == 888
	label define education 1 "Nursery School" 2 "Standard 1" 3 "Standard 2" 4 "Standard 3" 5 "Standard 4" 6 "Standard 5" 7 "Standard 6" 8 "Standard 7" 9 "Standard 8" 10 "Form 1" 11 "JCE/Form 2" 12 "Form 3" 13 "MSCE/Form 4" 16 "Technical/Private College (non-Degree)" 17 "Degree" 18 "Masters" 19 "PhD" 0 "None"
	label values education education
	label variable education "Education, with no education (None) as 0, not 888"
	notes education: In the original d12 variable, 888 indicates no education, which makes little sense, so this variable has all the underlying values shifted up one and then changes no education to be 0.

//d13 (Language read) - long survey
**relabeling d13, which asks respondent to read a card in their chosen language. Reported language is that language.
	gen reading_language = d13
	label var reading_language "Reading language (Labeled integer, 5=could not read)"
	label values reading_language d13
	notes reading_language: This variable indicates the language in which respondents elected to read a card (used to check literacy). 1=Chichewa, 2=English, 3=Tumbuka, 4=Yawo, 5=Could not read

//d14 (Literacy, Coder Evaluation) - long survey
**clean d14
	gen literacy = abs(d14 - 5) if d14 > 0
	label define literacy 1 "Could not read" 2 "Could read some of the card" 3 "Could read the whole card with difficulty" 4 "Could read the whole card with ease"
	label values literacy literacy
	label var literacy "How respondent read card: 1=could not read, 4=easily"

**make variable literacy_high
	gen literacy_high = 1 if literacy == 4
	replace literacy_high = 0 if literacy != 4 & literacy !=.
	label var literacy_high "Dummy, indicates ability to read card with ease"

**make variable literacy
	gen literacy_any = 1 if literacy == 3 | literacy == 4
	replace literacy_any = 0 if literacy < 3 & literacy !=.
	label var literacy_any "Dummy, indicates ability to read card in full"

//d6 (marital status) - long survey
	**renames d6 to be easily identifiable
	clonevar marital_status = d6 if d6 > 0

	**make a dummy called married that is 1 if married, 0 otherwise
	gen married = 1 if marital_status == 2 | marital_status == 3
	replace married = 0 if marital_status != 2 & marital_status != 3 & marital_status !=.
	label def married 1 "Married" 0 "Not Married"
	label values married married
	label var married "Dummy, 1 = Married, 0 = Not Married"

/// ECONOMIC VARIABLES
**Create label for economic variables section
	gen Economics =.
	label var Economics "--------------------"

//e1 (Income)
**Clean e1
	gen hh_income = e1 if e1 > 0
	label var hh_income "Estimated monthly household income (integer)"

**top code e1
	quietly sum hh_income, detail
	gen hh_income_top = hh_income
	replace hh_income_top = r(p99) if hh_income >= r(p99) & hh_income !=.
	label var hh_income_top "Estimated monthly household income (integer, top-coded 99%)"

**trim e1
	quietly sum hh_income, detail
	gen hh_income_trim = hh_income if hh_income <= r(p99) & hh_income >= 100
	label variable hh_income_trim "Estimated monthly household income (integer, trimmed 99% and < 100)"

//e3 (Selling in Market)
**clean e3 and reverse order of variable
	gen sell_freq = abs(e3 - 8) if e3 > 0
	label define sell_freq 1 "First time" 2 "Once a year" 3 "Once every few months" 4 "Once a month" 5 "A few times a month (2-4 times)" 6 "Every week (1-6 days per week)" 7 "Every day"
	label variable sell_freq "How often sell in market (labeled integer, 1=first time, 7=every day)"
	label values sell_freq sell_freq

**make dummy to indicate whether respondent sells regularly in market (weekly or more)
	gen sell_regular = 1 if sell_freq > 5 & sell_freq !=.
	replace sell_regular = 0 if sell_freq < 6 & sell_freq !=.
	label var sell_regular "Dummy, indicates if sells in market weekly or more often or not"

**make dummy to indicate whether respondent sells in market every day
	gen sell_daily = 1 if sell_freq == 7
	replace sell_daily = 0 if sell_freq < 7 & sell_freq !=.
	label var sell_daily "Dummy, indicates if sells daily or not"

//e5 (sell throughout year, or only at certain times) - long survey
	**cleans and recodes e5, makes it a dummy
	gen sell_yr_round = 1 if e5 == 1
	replace sell_yr_round = 0 if e5 == 2 & e5 !=.
	label def sell_yr_round 1 "Throughout the year" 0 "Only at certain times"
	label values sell_yr_round sell_yr_round
	label var sell_yr_round "Dummy, indicates sells at market throughout year vs only at certain times"

//e6 (what affects selling in market) - long survey
	**this code makes two dummy variables, one which indicates whether the rainy
	**season impacts why a vendor does not sell year-round, and another which
	**indicates that vendor sells only when crops available
	gen vendor_aff_rain = e6_1
	label var vendor_aff_rain "Dummy, indicates vendor does not sell year-round because of rainy season"

	gen vendor_aff_crops = e6_2
	label var vendor_aff_crops "Dummy, indicates vendor does not sell year-round because of crop availability"

//e7_a (service or good sold)
**make variable to indicate whether stall sells a good or not
	gen good = e7_a_1 if e7_a_1 >= 0
	label var good "Dummy, indicates sells goods in stall"

**make variable to indicate whether stall offers a service or not
	gen service = e7_a_2  if e7_a_2 >= 0
	label var service "Dummy, indicates offers service in stall"

//e7_b (type of service or good sold, enumerator evaluation)
**cleans e7_b by eliminating all others; most of e7_b_other could be hand coded to fit into these categories
	gen stall_type = e7_b if e7_b > 0
	label values stall_type e7_b
	label var stall_type "Different categories of stall types (labeled integer)"

//e10_primary (whether vendor is primary earner) - long survey
	**renames and labels the primary earner variable
	clonevar prim_earner = e10_primary if e10_primary >= 0
	label var prim_earner "Dummy, indicates vendor is primary earner in hh"

//e10_years - long survey
	gen yrs_in_mkt = e10_years if e10_years >= 0
	label var yrs_in_mkt "Years vendors has been selling in market (integer)"
	**There are decimal values for this variable?

//e10 (profit)
**clean e10
	gen profit = e10 if e10 >= 0
	label variable profit "Average profit (integer). Note values not all for same time span"

**make e10_unit more usable
	gen profit_unit = 1 if e10_unit == "Daily"
	replace profit_unit = 2 if e10_unit == "Weekly"
	replace profit_unit = 3 if e10_unit == "BiWeekly"
	replace profit_unit = 4 if e10_unit == "Monthly"
	replace profit_unit = 5 if e10_unit == "Annually"
	label def profit_unit 1 "Daily" 2 "Weekly" 3 "Biweekly" 4 "Monthly" 5 "Annually"
	label values profit_unit profit_unit
	label var profit_unit "To what timeframe profit ammount given belongs"

**topcode e10
	quietly sum profit, detail
	gen profit_top = profit
	replace profit_top = r(p99) if profit >= r(p99) & profit !=.
	label variable profit_top "Average profit (integer top-coded 99%). Note values not all for same time span"

**trim e10
	quietly sum profit, detail
	gen profit_trim = profit if profit <= r(p99) & profit >= 100
	label variable profit_trim "Average profit (integer, trimmed 99% and < 100). Note values not all for same time span"

**make profit_monthly which has only monthly profits where we are sure of how often they sell
	**First make e4_count into an integer variable, and remove impossible values
	gen days_sell = real(e4_count) if e4_count != "8"

	gen profit_monthly = profit if (sell_freq == 2) | (sell_freq == 4) | ((sell_freq == 6 | sell_freq == 5 | sell_freq == 7) & profit_unit == 4)
	replace profit_monthly = profit * 52/12 if (sell_freq == 6 & profit_unit == 2) | (sell_freq == 7 & profit_unit == 2)
	replace profit_monthly = profit * 26/12 if (sell_freq == 6 & profit_unit == 3) | (sell_freq == 7 & profit_unit == 3)
	replace profit_monthly = profit / 12 if (sell_freq == 6 & profit_unit == 5) | (sell_freq == 7 & profit_unit == 5)
	replace profit_monthly = profit * 365/12 if (surveytype2 == 2 & (sell_freq == 7 & profit_unit == 1))
	replace profit_monthly = profit * days_sell * 52/12 if (surveytype2 == 1 & ((sell_freq == 6 & profit_unit == 1) | (sell_freq == 7 & profit_unit == 1)))
	label variable profit_monthly "Average monthly profit (comparable monthly values only)"
	notes profit_monthly: This variable is an adjusted version of profit. The goal was to make the values of this revenue variable roughly comparable at the monthly level, as different respondents gave different unit for their revenue. That is to say, some gave revenue in daily terms, some weekly, etc. This variable includes only values of which we can be reasonably certain.

**make profit_monthly_full which is our best estimate for as many obs as possible. Is messier.
	gen profit_monthly_full = profit_monthly
	replace profit_monthly_full = profit if sell_freq == 1
	replace profit_monthly_full = profit * 4/12 if (sell_freq == 3)
	replace profit_monthly_full = profit * 4 if (sell_freq == 5 & (profit_unit == 1 | profit_unit == 2))
	replace profit_monthly_full = profit * 2 if (sell_freq == 5 & profit_unit == 3)
	quietly sum days_sell if sell_freq == 6 & profit_unit == 1
	replace profit_monthly_full = profit * r(mean) * 52/12 if (surveytype2 == 2 & (sell_freq == 6 & profit_unit == 1))
	label var profit_monthly_full "Average monthly profit (best guess monthly ave)"

**make profit_daily, same idea as profit_monthly but for daily profit
	gen profit_daily = profit * 12/365 if (sell_freq == 2) | (sell_freq == 4) | ((sell_freq == 6 | sell_freq == 5 | sell_freq == 7) & profit_unit == 4)
	replace profit_daily = profit * 52/365 if (sell_freq == 6 & profit_unit == 2) | (sell_freq == 7 & profit_unit == 2)
	replace profit_daily = profit * 26/365 if (sell_freq == 6 & profit_unit == 3) | (sell_freq == 7 & profit_unit == 3)
	replace profit_daily = profit / 365 if (sell_freq == 6 & profit_unit == 5) | (sell_freq == 7 & profit_unit == 5)
	replace profit_daily = profit if (surveytype2 == 2 & (sell_freq == 7 & profit_unit == 1))
	replace profit_daily = profit * days_sell * 52/365 if (surveytype2 == 1 & ((sell_freq == 6 & profit_unit == 1) | (sell_freq == 7 & profit_unit == 1)))
	label variable profit_daily "Average daily profit (comparable daily values only)"
	notes profit_daily: This variable is an adjusted version of profit. The goal was to make the values of this revenue variable roughly comparable at the daily level, as different respondents gave different unit for their revenue. That is to say, some gave revenue in daily terms, some weekly, etc. This variable includes only values of which we can be reasonably certain.

**make profit_daily_full which is our best estimate for as many obs as possible. Is messier..
	gen profit_daily_full = profit_daily
	replace profit_daily_full = profit / 365 if sell_freq == 1
	replace profit_daily_full = profit * 4/365 if (sell_freq == 3)
	replace profit_daily_full = profit * 4 * 12/365 if (sell_freq == 5 & (profit_unit == 1 | profit_unit == 2))
	replace profit_daily_full = profit * 2 * 12/365 if (sell_freq == 5 & profit_unit == 3)
	quietly sum days_sell if sell_freq == 6 & profit_unit == 1
	replace profit_daily_full = profit * r(mean) * 52/365 if (surveytype2 == 2 & (sell_freq == 6 & profit_unit == 1))
	label var profit_daily_full "Average daily profit (best guess daily ave)"

****checking balance
	ttest profit_monthly, by(surveytype2)
	ttest profit_monthly_full, by(surveytype2)
	ttest profit_daily, by(surveytype2)
	ttest profit_daily_full, by(surveytype2)

****checking occupation for top 1% of profit_monthly
	quietly sum profit_monthly, detail
	tab stall_type profit_monthly if profit_monthly > r(p99)
	**retail-groceries is modal category, with 26 of the top 1% earners (37%)
	**but highest value is retail-clothes, shoes

****same but with top 1% of profit_monthly_full
	quietly sum profit_monthly_full, detail
	tab stall_type profit_monthly_full if profit_monthly_full > r(p99)
	**retail-groceries is modal category with 34 of top 1% earners (29.8%)

//e15 (how do economic conditions last year compare to this year) - long survey
	**5 option scale, scale reversed to make it more logical
	gen econ_cond_5 = abs(e15 - 6) if e15 > 0
	label def econ_cond_5 1 "Much Worse" 2 "Worse" 3 "Same" 4 "Better" 5 "Much Better"
	label values econ_cond_5 econ_cond_5
	label var econ_cond_5 "How are econ conditions this month this year compared to last year, 5 options"

	**collapse to 3 option scale
	gen econ_cond_3 = 1 if econ_cond_5 == 1 | econ_cond_5 == 2
	replace econ_cond_3 = 2 if econ_cond_5 == 3
	replace econ_cond_3 = 3 if econ_cond_5 == 4 | econ_cond_5 == 5
	label def econ_cond_3 1 "Worse" 2 "Same" 3 "Better"
	label values econ_cond_3 econ_cond_3
	label var econ_cond_3 "How are econ conditions this month this year compared to last year, 3 options"

	**dummy if conditions worse than last year
	gen econ_cond_worse = 1 if econ_cond_3 == 1
	replace econ_cond_worse = 0 if econ_cond_3 == 2 | econ_cond_3 == 3
	label var econ_cond_worse "Dummy. Are econ conditions this year worse than last year"

	**dummy if conditions better than last year
	gen econ_cond_better = 1 if econ_cond_3 == 3
	replace econ_cond_better = 0 if econ_cond_3 == 1 | econ_cond_3 == 2
	label var econ_cond_better "Dummy. Are econ conditions this year better than last year"

/// MAIN OUTCOME MEASURES
**Create label for outcome variables section
	gen Main_Outcomes =.
	label var Main_Outcomes "--------------------"

//List Experiment Treatment Indicator
**This generates an indicator variable for whether a respondent received the
**treatment list or the control list
	gen list_treat = 0 if d15_control !=. & d15_control >= 0
	replace list_treat = 1 if d15_treatment !=. & d15_treatment >= 0
	label define list_treat 0 "Control" 1 "Treatment"
	label values list_treat list_treat
	label var list_treat "For list exp. 0=in control group, 1=in treatment group"

//d15_control and d15_treatment (List Experiment)
**creates a combined list experiment variable, to facilitate analysis, while also cleaning the variable
	gen list_outcome = d15_control if d15_control >= 0
	replace list_outcome = d15_treatment if list_treat == 1
	label var list_outcome "Combined List Experiment Counts (integer)"

**check whether survey type made a difference in how people responded
	ttest list_outcome, by(surveytype2)

//Outcome Measure Randomization Indicator
**this code makes random_block, which decided in which order long survey respondents
**answered the key outcome questions, into a string variable
	gen random_block_n = real(random_block)
	label var random_block_n "Decided order of outcome questions (only long survey)"

**this code creates an indicator variable that identifies in which order of
**respondents answered key outcome questions.
	gen order_indicator =.
	foreach j of num 1/4 {
		replace order_indicator = `j' if op`j'_tc1a !=.
	}
	label var order_indicator "Indicator for order of outcome questions (integer)"

//op`i'_tc1a, b, and c (counts of past 5 days: paid in full, paid partially, did not pay, respectively
**This code cleans all 3 subcomponents of the 4 versions of this variable
	foreach i of num 1/4 {
		foreach x in a b c {
			gen fee1_`x'_`i' = op`i'_tc1`x' if op`i'_tc1`x' >= 0 & op`i'_tc1`x' <= 5
			label var fee1_`x'_`i' "Of past 5 days, how many a=paid full fee, b=paid partial fee, c=paid no fee"
		}
	}

**this combines the four versions of each subcomponent into one variable

***days out of past five days fee paid in full
	gen fee1_full =.

	foreach i of num 1/4 {
		replace fee1_full = fee1_a_`i' if order_indicator == `i'
	}

	label var fee1_full "Of past 5 days, number of days full fee paid (integer)"

***days out of past five days fee paid partially
	gen fee1_part =.

	foreach i of num 1/4 {
		replace fee1_part = fee1_b_`i' if order_indicator == `i'
	}

	label var fee1_part "Of past 5 days, number of days partial fee paid (integer)"

***days out of past five pays no fee paid
	gen fee1_none =.

	foreach i of num 1/4 {
		replace fee1_none = fee1_c_`i' if order_indicator == `i'
	}

	label var fee1_part "Of past 5 days, number of days no fee paid (integer)"

**Checking randomization
***provides summary statistics for each order of each variable, broken down by order indicator
foreach x in full part none {
	foreach i of num 1/4 {
		sum fee1_`x' if order_indicator == `i'
	}
}

foreach x in full part none {
	foreach i of num 1/2 {
		sum fee1_`x' if surveytype2 == `i'
	}
}

***ttest for difference of means for fee1_none for order_indicator 1 (mean = .731) and 2 (mean = .678)
ttest fee1_c_1 == fee1_c_2, unpaired

***ttest for difference of means for fee1_part for order_indicator 1 (mean = .357) and 4 (mean = .257)
ttest fee1_b_1 == fee1_b_4, unpaired

***ttest for difference of means for fee1_part for order_indicator 1 and 4 for long survey only
ttest fee1_b_1 == fee1_b_4 if surveytype2 == 1, unpaired

***test for difference of means for short vs long survey
foreach x in full part none {
	ttest fee1_`x', by(surveytype2)
}

//op'i'_tc3a, b, and c (counts of, out of a group of 10 tokens, how many represent vendors: who paid in full, who paid some amount, and who paid nothing
**this code cleans and renames generically each of the three components of this outcome variable for each randomization order
	foreach i of num 1/4 {
		foreach x in a b c {
			gen fee2_`x'_`i' = op`i'_tc3`x' if op`i'_tc3`x' >= 0 & op`i'_tc3`x' <= 10
			label var fee2_`x'_`i' "Out of 10 vendors, how man a=always pay, b=sometimes, c=never. 1:4, random order"
		}
	}

**this combines the four versions of each subcomponent into one variable
***Out of 10 vendors, how many always pay fee
	gen fee2_always =.

	foreach i of num 1/4 {
		replace fee2_always = fee2_a_`i' if order_indicator == `i'
	}

	label var fee2_always "Out of 10 vendors, how many always pay fee(integer)"

***out of 10 vendors, how many sometimes pay fee
	gen fee2_sometimes =.

	foreach i of num 1/4 {
		replace fee2_sometimes = fee2_b_`i' if order_indicator == `i'
	}

	label var fee2_sometimes "Out of 10 vendors, how many sometimes pay fee(integer)"

***out of 10 vendors, how many never pay fee
	gen fee2_never =.

	foreach i of num 1/4 {
		replace fee2_never = fee2_c_`i' if order_indicator == `i'
	}

	label var fee2_never "Out of 10 vendors, how many never pay fee(integer)"

**Checking randomization
***provides summary statistics for each order of each variable, broken down by order indicator
foreach x in always sometimes never {
	foreach i of num 1/4 {
		sum fee2_`x' if order_indicator == `i'
	}
}

foreach x in always sometimes never {
	foreach i of num 1/2 {
		sum fee2_`x' if surveytype2 == `i'
	}
}

***ttest for difference of means between fee2_never order_indicators 2 (mean = 1.359) and 4 (mean = 1.275)
	ttest fee2_c_2 == fee2_c_4, unpaired

***ttest for difference of means between survey types
	foreach x in always sometimes never {
		ttest fee2_`x', by(surveytype2)
}

//op*_tc2_29_* (with which statement about paying fees they agree with more, order randomized)
**makes random_tc2_29, which randomizes order of two statements, into an integer

	gen statement_order = real(random_tc2_29)
	label var statement_order "Random uniform draw, decides which statement respondents see first"

**then makes an indicator variable out of it
	gen statement_order2 = 0 if statement_order < .5
	replace statement_order2 = 1 if statement_order >= .5
	label define statement_order2 0 "Vendors should always pay, first" 1 "Vendors should only pay when agree, first"
	label values statement_order2 statement_order2
	label var statement_order2 "0:vendors should always pay, first; 1:vendors only when agree, first"

**this code cleans the two alternatives of the four random orders, cleans them
**and makes them into string variables, as this is necessary for combining them later
	foreach i of num 1/4 {
		foreach x in a b {

			if "`x'" == "a" {
				gen tax_morale_`x'_`i' = "Vendors should always pay tax even if they disagree with local government" if op`i'_tc2_29_`x' == 1
				replace tax_morale_`x'_`i' = "Vendors should only pay tax if they agree with local government" if op`i'_tc2_29_`x' == 2
			}
			else if "`x'" == "b" {
				gen tax_morale_`x'_`i' = "Vendors should only pay tax if they agree with local government" if op`i'_tc2_29_`x' == 1
				replace tax_morale_`x'_`i' = "Vendors should always pay tax even if they disagree with local government" if op`i'_tc2_29_`x' == 2
			}

			label var tax_morale_`x'_`i' "Agree with vendors always pay, or only when agree with loc gov"
		}
	}

**this code combines the four randomized question order versions and two randomized
**statement order versions of each variable into one variable
	gen temp_morale = ""

	foreach i of num 1/4 {
		foreach x in a b {
			replace temp_morale = tax_morale_`x'_`i' if tax_morale_`x'_`i' != ""
			}
		}

	encode temp_morale, generate (tax_morale) //to make this variable into indicator
	**switching values around so that higher number means "higher" tax morale, then making it 0 1
	replace tax_morale = abs(tax_morale - 3) - 1
	label def tax_morale 0 "Vendors should only pay tax if they agree with local government" 1 "Vendors should always pay even if they disagree with local government", replace
	label values tax_morale tax_morale
	label var tax_morale "Agree with vendors always pay (1), or only when agree with loc gov (0)"


	drop temp_morale //dropping unnecessary temp_agree


***check randomization
	foreach i of num 1/4 {
		sum tax_morale if order_indicator == `i' & surveytype2 == 1
	}

	ttest tax_morale, by(statement_order2) //the statement order does matter
	ttest tax_morale, by(surveytype2) //difference in responses between surveys

	tab tax_morale statement_order2 //crosstab
	tab tax_morale if statement_order2 == 0 //this and next line checks percents in each statement order group
	tab tax_morale if statement_order2 == 1

	tab tax_morale if surveytype2 == 1 //this and next line checks percents in each survey type group
	tab tax_morale if surveytype2 == 2

//op*_tc2_30 (whether responds agrees strongly or very strongly with statement for tc2_29)
**this code combines all different versions of op*_tc2_30 into one variable
	gen tax_morale_vstrong =.

	foreach i of num 1/4 {
		replace tax_morale_vstrong = op`i'_tc2_30 if op`i'_tc2_30 !=. & op`i'_tc2_30 >= 0
	}

	**to make variable 0 if only strong agreement, 1 if very strong agreement
	replace tax_morale_vstrong = tax_morale_vstrong - 1
	label def tax_morale_vstrong 0 "Strongly" 1 "Very Strongly"
	label values tax_morale_vstrong tax_morale_vstrong
	label var tax_morale_vstrong "How strongly agree with chosen tax_morale statement. 0=strongly, 1=very strongly"

***check randomization
	foreach i of num 1/4 {
		sum tax_morale_vstrong if order_indicator == `i' & surveytype2 == 1
	}

	ttest tax_morale_vstrong, by(statement_order2) //the statement order does not seem to matter
	ttest tax_morale_vstrong, by(surveytype2) //difference in responses between surveys

//op*_tc2_alt5a (of past five days, how many see fee collector) - long survey
**this code combines the four versions of this variable (from the different question orders)
	gen see_fcoll =.

	foreach i of num 1/4 {
		replace see_fcoll = op`i'_tc2_alt5a if op`i'_tc2_alt5a !=. & op`i'_tc2_alt5a >= 0
	}

	label var see_fcoll "In past five days, how many see fee collector"

//op*_tc2_alt5b (of past five days, how many interact fee collector) - long survey
**this code combines the four versions of this variable (from the different question orders)
	gen interact_fcoll =.

	foreach i of num 1/4 {
		replace interact_fcoll = op`i'_tc2_alt5b if op`i'_tc2_alt5b !=. & op`i'_tc2_alt5b >= 0
	}

	label var interact_fcoll "In past five days, how many interact with fee collector"

//op*_tc2_alt5c (of past days you saw fee collector, how many days did you pay fee) - long survey
**this code combines the four versions of this variable (from the different question orders)
	gen pay_fcoll =.

	foreach i of num 1/4 {
		replace pay_fcoll = op`i'_tc2_alt5c if op`i'_tc2_alt5c !=. & op`i'_tc2_alt5c >= 0
	}

	label var pay_fcoll "In see_fcoll days, how many pay fee collector"
	**cross tab this with see_fcoll

//op*_tc2_alt5d (if fee collector did not visit, were you still able to pay fee) - long survey
**this code combines the four versions of this variable (from the different question orders)
	gen pay_no_fc_d =.

	foreach i of num 1/4 {
		replace pay_no_fc_d = op`i'_tc2_alt5d if op`i'_tc2_alt5d !=. & op`i'_tc2_alt5d >= 0
	}
	label def pay_no_fc_d 1 "Able to Pay" 0 "Not Able to Pay"
	label values pay_no_fc_d pay_no_fc_d
	label var pay_no_fc_d "On days no fee collector, were you still able to pay fee"

//op*_tc2_alt5d (if fee collector did not visit, were you still able to pay fee) - long survey
**this code combines the four versions of this variable (from the different question orders)
	gen pay_no_fc =.

	foreach i of num 1/4 {
		replace pay_no_fc = op`i'_tc2_alt5e if op`i'_tc2_alt5e !=. & op`i'_tc2_alt5e >= 0
	}

	label var pay_no_fc "On days no fee collector, how many were you able to pay(when pay_no_fc_d = 1"

//op*_e9_e (how much the respondent paid the last time she paid market fees) - long survey
**this code combines the four different versions of this variable
	gen pay_amt =.

	foreach i of num 1/4 {
		replace pay_amt = op`i'_e9_e if op`i'_e9_e !=. & op`i'_e9_e >= 0
	}

	label values pay_amt op3_e9_e
	label var pay_amt "How much paid, last time paid market fee (labeled integer)"

**this code first combines the four versions of op*_e9_e_other
	gen pay_amt_other =.

	foreach i of num 1/4 {
		replace pay_amt_other = op`i'_e9_e_other if op`i'_e9_e_other !=. & op`i'_e9_e_other >= 0
	}

	label var pay_amt_other "How much paid, last time paid market fee (labeled integer)"


**then, this combines pay_amt and pay_amt_other, thereby turning it into a continuous measure
	gen pay_amt2 = pay_amt_other

	replace pay_amt2 = 50 if pay_amt == 1
	replace pay_amt2 = 100 if pay_amt == 2
	replace pay_amt2 = 200 if pay_amt == 3

	label var pay_amt2 "How much paid, last time paid fee (comb w/ pay_amt, integer)"

//op*_e9_eunit (the time unit for op*_e9_e) - long survey
	gen pay_amt_unit =.

	foreach i of num 1/4 {
		replace pay_amt_unit = op`i'_e9_eunit if op`i'_e9_eunit !=. & op`i'_e9_eunit >= 0
	}

	label values pay_amt_unit op3_e9_eunit
	label var pay_amt "Unit of time for pay_amt (labeled integer)"

//tc4 - Long Survey
	clonevar tc4_clean = tc4 if tc4 >= 0
	**reorder so that 1 is lower, 2 is same and 3 is higher
	replace tc4_clean = 0 if tc4_clean == 3
	replace tc4_clean = -1 if tc4_clean == 2
	label define tc4_clean 1 "Higher" 0 "About the same" -1 "Lower"
	label values tc4_clean tc4_clean

//tc5a, tc5b - Long Survey
	gen tc5a_clean = abs(tc5a - 5) if tc5a >= 0
	label define agreement 1 "Strongly Disagree" 2 "Somewhat Disagree" 3 "Somewhat Agree" 4 "Strongly Agree"
	label values tc5a_clean agreement
	label var tc5a_clean "If I wanted, I could refuse to pay market fee"

	gen tc5b_clean = abs(tc5b - 5) if tc5b >= 0
	label values tc5b_clean agreement
	label var tc5b_clean "If all vendors work together, could stop paying fees"


/// RECEIPT QUESTIONS
	gen Receipts =.
	label var Receipts "----------------------"

//tc7 (receipt)
**cleaning tc7
	gen receipt = tc7 if tc7 >= 0
	label values receipt tc7
	label var receipt "If the respondent received a receipt last time paid fee"

//tc2 (receipt check)
	gen receipt_check = tc2 if tc2 >= 0
	label values receipt_check tc2
	label var receipt_check "Was respondent able to produce receipt (1), or not (0)"

//tc2_datenone (does receipt have a date)
	gen receipt_has_date = abs(tc2_datenone-1) if tc2_datenone >= 0
	label define receipt_has_date 1 "Receipt has date" 0 "Receipt does not have date"
	label values receipt_has_date receipt_has_date
	label var receipt_has_date "Does the receipt have a date (1), or not (0)"

//tc2_date (what is the date on the receipt)
	clonevar receipt_date = tc2_date
	label var receipt_date "The date on the receipt shown to enumerator"

//tc2a (is this official government receipt)
	clonevar receipt_official = tc2a if tc2a >= 0
	label var receipt_official "Whether the receipt by enumerator from dist gov (1) or not (0)"

	gen recent_receipt_7 = 1 if today - receipt_date <= 7 //can change this value (and in line below), if window should be expanded.
	replace recent_receipt_7 = 0 if recent_receipt_7 != 1
	label var recent_receipt_7 "Was receipt shown to enumerator from the past seven days?"


	***10 days
	gen recent_receipt_10 = 1 if today - receipt_date <= 10 //can change this value (and in line below), if window should be expanded.
	replace recent_receipt_10 = 0 if recent_receipt_10 != 1
	label var recent_receipt_10 "Was receipt shown to enumerator from the past seven days?"



/// OTHER OUTCOMES
	gen Other_Outcomes =.
	label var Other_Outcomes "--------------------"

//p2 (contact local government official or government institution about a problem)
	clonevar contact_gov = p2 if p2 >= 0 | p2 == -99 //this means that respondents who answered Don't Know to question are included
	replace contact_gov = 0 if contact_gov == - 99 //makes it so that Don't Knows are included with those who did not contact official
	label var contact_gov "If contacted government (1) or not (0)"

//p6 (how satisfied with local government)
	gen satisfaction_gov = abs(p6 - 5) if p6 >= 0 //in order to reverse order of variable to make it more logical
	label def sat_gov 1 "Very Dissatisfied" 2 "Somewhat Dissatisfied" 3 "Somewhat Satisfied" 4 "Very Satisfied"
	label values satisfaction_gov sat_gov
	label var satisfaction_gov "Of district government. 1=very dissatisfied, 4=very satisfied"

//ms10 (how satisfied with developements in this market provided by district government)
	gen satisfaction_dev = abs(ms10 - 5) if ms10 >= 0 //in order to reverse order of variable to make it more logical
	label values satisfaction_dev sat_gov //same labels as satisfaction_gov variable
	label var satisfaction_dev "Of developments in market provided by gov. 1=very dissatisfied, 4=very satisfied"

//// Market committee questions	- Long Survey
//s5 - Long Survey
	clonevar s5_clean = s5 if s5 >= 0

//s6 - Long Survey
	clonevar s6_clean = s6 if s6 > 0
	**make No = 0, so it makes more sense
	replace s6_clean = 0 if s6_clean == 2
	label define yes_no 0 "No" 1 "Yes"
	label values s6_clean yes_no

//s7 - Long Survey
	clonevar s7_clean = s7 if s7 > 0
	**make No = 0, so it makes more sense
	replace s7_clean = 0 if s7_clean == 2
	label values s7_clean yes_no

//s8 (4 separate variables, because s8 allows for multiple choices) - Long Survey
	gen s8_1_clean = s8_1
	label values s8_1_clean yes_no
	label var s8_1_clean "Is role of mkt comm. to coordinate btwn vendors and mkt mgmt?"

	gen s8_2_clean = s8_2
	label values s8_2_clean yes_no
	label var s8_2_clean "Is role of mkt comm. to coordinate btwn vendors and local gov?"

	gen s8_3_clean = s8_3
	label values s8_3_clean yes_no
	label var s8_3_clean "Is role of mkt comm. to solve disputes among vendors?"

	gen s8_4_clean = s8_4
	label values s8_4_clean yes_no
	label var s8_4_clean "Is role of mkt comm. to help vendors when they have financial problems?"

	////Would be interesting to take a look at s8__77?
//// End market committee questions

////Knowledge of legal stuff questions (tc2*) - Long Survey
//tc2_* - Long Survey
	foreach i of num 1/3 {
		clonevar tc2_`i'_clean = tc2_`i' if tc2_`i' >= 0
	}

//tc2_4*
	foreach x in a b c d {
		gen tc2_4`x'_clean = abs(5 - tc2_4`x') if tc2_4`x' >= 0
		label values tc2_4`x'_clean agreement
	}

	label var tc2_4a_clean "For developments in community provided by district gov, citizens must pay taxes"
	label var tc2_4b_clean "Paying taxes is duty of all citizens, even when don't approve of how off spend money"
	label var tc2_4c_clean "Fees from markets like this go to dev in markets from dist gov"
	label var tc2_4d_clean "Fees from markets like this go to dev in comm. provided by dist gov"

//tc2_7*
	clonevar tc2_7_clean = tc2_7 if tc2_7 >= 0

	clonevar tc2_7b_clean = tc2_7b if tc2_7b >= 0

//tc2_8*
	foreach x in a b {
		gen tc2_8`x'_clean = abs(5 - tc2_8`x') if tc2_8`x' >= 0
		label values tc2_8`x'_clean agreement
	}

	label var tc2_8a_clean "Fee collectors in mkt are polite and treat vendors well"
	label var tc2_8b_clean "Fee collectors in mkt are diligent in transferring full amt to dist gov"

//tc2_9
	gen tc2_9_clean = abs(6 - tc2_9) if tc2_9 >= 0
	label define high_low 1 "Much too low" 2 "A bit too low" 3 "About right" 4 "A bit too high" 5 "Much too high"
	label values tc2_9_clean high_low
	label var tc2_9_clean "Proportion of mkt fees spent on market upkeep vs developments in comm. by dist gov is"

//tc2_10
	**there are 3 values greater than 1000, which isn't possible; I've conservatively dropped them
	clonevar tc2_10_clean = tc2_10 if tc2_10 >= 0 & tc2_10 <= 1000

//tc2_11_*
	foreach i of num 1/5{
		gen tc2_11_`i'_clean = tc2_11_`i' if tc2_11_`i' >= 0
		label values tc2_11_`i'_clean yes_no
	}

	label var tc2_11_1_clean "Do you think dist spends fee money on paying salaries for dist officials?"
	label var tc2_11_2_clean "Do you think dist spends fee money on education?"
	label var tc2_11_3_clean "Do you think dist spends fee money on health care?"
	label var tc2_11_4_clean "Do you think dist spends fee money on roads and infrastructure?"
	label var tc2_11_5_clean "Do you think dist officials use fee money for personal use?"

//tc2_14_*
	foreach i of num 1/9{
		gen tc2_14_`i'_clean = tc2_14_`i' if tc2_14_`i' >= 0
		label values tc2_14_`i'_clean yes_no
	}

	label var tc2_14_1_clean "In past 6 mon has fee coll talked to you about why should pay fees"
	label var tc2_14_2_clean "In past 6 mon has some from mkt comm. talked to you about why should pay fees"
	label var tc2_14_3_clean "In past 6 mon has a fee coll or mkt employee threatened to confiscate goods"
	label var tc2_14_4_clean "In past 6 mon has a fee coll actually confiscated some goods"
	label var tc2_14_5_clean "In past 6 mon have you been kicked out of market"
	label var tc2_14_6_clean "In past 6 mon has a fee coll threatened you with violence"
	label var tc2_14_7_clean "In past 6 mon has a fee coll used violence against you"
	label var tc2_14_8_clean "Respondent always pays fee"
	label var tc2_14_9_clean "Respondent says nothing has happened"

//tc2_14b
	foreach i of num 1/7 9 {
		gen tc2_14b_`i'_clean = tc2_14b_`i' if tc2_14b_`i' >= 0
		label values tc2_14b_`i'_clean yes_no
	}

	label var tc2_14b_1_clean "In past 3 mon has fee coll talked to vendor about why should pay fees"
	label var tc2_14b_2_clean "In past 3 mon has some from mkt comm. talked to vendor about why should pay fees"
	label var tc2_14b_3_clean "In past 3 mon has a fee coll or mkt employee threatened to confiscate goods"
	label var tc2_14b_4_clean "In past 3 mon has a fee coll actually confiscated some goods"
	label var tc2_14b_5_clean "In past 3 mon has vendor been kicked out of market"
	label var tc2_14b_6_clean "In past 3 mon has a fee coll threatened vendor with violence"
	label var tc2_14b_7_clean "In past 3 mon has a fee coll used violence against vendor"
	label var tc2_14b_9_clean "Respondent says nothing has happened"

//tc2_15*
	foreach x in a b c {
		gen tc2_15`x'_clean = abs(5 - tc2_15`x') if tc2_15`x' >= 0
		label values tc2_15`x'_clean agreement
	}

	label var tc2_15a_clean "I pay market fees because it's the right thing to do"
	label var tc2_15b_clean "I pay market fees because I'll get in trouble if I don't"
	label var tc2_15c_clean "I py mkt fees b/c I get developments in comm. provided by dist gov"

	clonevar tc2_15_impct_clean = tc2_15_impact if tc2_15_impact >= 0

//running through rest of tc2_16-28 (note for analysis tc2_26-28 would have to be reversed
	foreach i of num 16/18 21 23/28 {
		clonevar tc2_`i'_clean = tc2_`i' if tc2_`i' >= 0
	}
////End knowledge of legal stuff questions

////Market services questions - Long Survey
//ms1-6
	foreach i of num 1/6 {
		gen ms`i'_clean = abs(5 - ms`i') if ms`i' >= 0
		label values ms`i'_clean sat_gov
	}

	label var ms1_clean "Access to clean water"
	label var ms2_clean "Toilets"
	label var ms3_clean "Garbage Collection"
	label var ms4_clean "Condition of the Pathways"
	label var ms5_clean "Condition of the Stalls"
	label var ms6_clean "Security"

//ms9 (ms8 is clean as is)
	clonevar ms9_clean = ms9 if ms9 >= 0
////End market services

////Spillover questions - Long Survey
//sp5 is clean

//sp7_*
	foreach i of num 1/6 {
		gen sp7_`i'_clean = sp7_`i'
		label values sp7_`i'_clean yes_no
	}

	label var sp7_1_clean "Sell in other markets b/c they are close together"
	label var sp7_2_clean "Sell in other markets b/c they are open on diff days"
	label var sp7_3_clean "Sell in other markets b/c they are in good condition/good dev from dist gov"
	label var sp7_4_clean "Sell in other markets to have more customers"
	label var sp7_5_clean "Sell in other markets b/c of way market is managed"
	label var sp7_6_clean "See in other markets b/c fees are low"
////End spillover questions

////Exposure questions - Long Survey
//te1, 2, 4, 5, 6, 8, 8_funded, 9, 13, 15, 16, 17, 18, 19, 20, 21 (all one answer)
	foreach x in 1 2 4 5 6 8 8_funded 13 15 16 17 18 19 20 21 {
		clonevar te`x'_clean = te`x' if te`x' >= 0
	}

**Next few are all variables that allowed multiple answer choices

//te3
	foreach i of num 1/6 {
		gen te3_`i'_clean = te3_`i'
		label values te3_`i'_clean yes_no
	}

	label var te3_1_clean "At meeting discussed developments in market provided by dist gov"
	label var te3_2_clean "At meeting discussed need for paying market fees"
	label var te3_3_clean "At meeting discussed corruption in fee collection"
	label var te3_4_clean "At meeting discussed how market fees will be used in the future"
	label var te3_5_clean "At meeting discussed vendors who sell outside of mkt/who can sell in mkt"
	label var te3_6_clean "At meeting discussed overseeing market elections"

//te3_ms
	foreach i of num 1/6 {
		gen te3_ms_`i'_clean = te3_ms_`i'
		label values te3_ms_`i'_clean yes_no
	}

	label var te3_ms_1_clean "Development prov by dist gov discussed: toilets"
	label var te3_ms_2_clean "Development prov by dist gov discussed: water taps"
	label var te3_ms_3_clean "Development prov by dist gov discussed: security/lights"
	label var te3_ms_4_clean "Development prov by dist gov discussed: roofs/stalls"
	label var te3_ms_5_clean "Development prov by dist gov discussed: paths"
	label var te3_ms_6_clean "Development prov by dist gov discussed: trash skip"

//te7
	foreach i of num 1/6 {
		gen te7_`i'_clean = te7_`i'
		label values te7_`i'_clean yes_no
	}

	label var te7_1_clean "Toilets improved in last year"
	label var te7_2_clean "Water taps improved in last year"
	label var te7_3_clean "Security/lights improved in last year"
	label var te7_4_clean "Roofs/stalls improved in last year"
	label var te7_5_clean "Paths improved in last year"
	label var te7_6_clean "Trash skip improved in last year"

//te14
	foreach i of num 1/5 {
		gen te14_`i'_clean = te14_`i'
		label values te14_`i'_clean yes_no
	}

	label var te14_1_clean "Vendors learn about $ dist collects from mkt by SMS system"
	label var te14_2_clean "Vendors learn about $ dist collects from mkt by posters/other mats in mkt"
	label var te14_3_clean "Vendors learn about $ dist collects from mkt by someone coming to mkt to tell"
	label var te14_4_clean "Vendors learn about $ dist collects from mkt through mkt comm."
	label var te14_5_clean "Vendors learn about $ dist collects from mkt by asking fee coll or mkt manager"

//te18_add
	foreach i of num 1/7 {
		gen te18_add_`i'_clean = te18_add_`i'
		label values te18_add_`i'_clean yes_no
	}

	label var te18_add_1_clean "To report improp behav by fee coll would contact market manager"
	label var te18_add_2_clean "To report improp behav by fee coll would contact zone manager/rev coll"
	label var te18_add_3_clean "To report improp behav by fee coll would contact district councilor"
	label var te18_add_4_clean "To report improp behav by fee coll would contact district revenue officer"
	label var te18_add_5_clean "To report improp behav by fee coll would report via SMS"
	label var te18_add_6_clean "To report improp behav by fee coll would contact police"
	label var te18_add_7_clean "To report improp behav by fee coll would contact village leader/chief"
////End exposure questions

////Political questions (part 2) - Long Survey
//p9
	clonevar p9_clean = p9 if p9 >= 0

**making this into a voted dummy
	gen voted = p9_clean if p9 <= 1
	label values voted yes_no
	label var voted "Did you vote in last presidential elections"

//p10a-d
	clonevar pay_income_tax = p10a if p10a >= 0

	clonevar pay_land_busi_tax = p10b if p10b >= 0

	clonevar pay_busi_licen = p10c if p10c >= 0

	clonevar pay_VAT_sell = p10d if p10d >= 0
////End political questions

////Trust Questions - Long Survey
	label define trust_new 1 "Not at all trustworthy" 2 "Not very trustworthy" 3 "Somewhat trustworthy" 4 "Very trustworthy"

//tr1, 2, tr10, tr11 (how trustworthy questions)
	foreach i of num 1/2 10/11 {
		gen tr`i'_clean = abs(5 - tr`i') if tr`i' >= 0
		label values tr`i'_clean trust_new
	}

	label var tr1_clean "Is district government trustworthy?"
	label var tr2_clean "Is ward councilor for this market trustworthy?"
	label var tr10_clean "Are people in this market trustworthy?"
	label var tr11_clean "Are Malawins in general trustworthy?"

//tr3, tr5	(how often questions)
	foreach i of num 3 5 {
		clonevar tr`i'_clean = tr`i' if tr`i' >= 0
	}

//tr9a-f (agreeing with series of statements)
	foreach x in a b c d e f {
		gen tr9`x'_clean = abs(5 - tr9`x') if tr9`x' >= 0
		label values tr9`x'_clean agreement
	}

	label var tr9a_clean "Agree with how dist gov manages conveying plans to citizens in area"
	label var tr9b_clean "Agree with how dist gov manages devs in comm. prov by dist gov"
	label var tr9c_clean "Agree with how dist gov manages collecting rev fairly from citizens"
	label var tr9d_clean "Agree with how dist gov manages transparency in reporting activity to citizens"
	label var tr9e_clean "Agree with how dist gov manages managing public funds eff on behalf of citizens"
	label var tr9f_clean "Agree with how dist gov manages including citizens of area in planning"

//tr4, tr6 (select multiple answers)
	foreach i of num 4 6 {
		foreach x of num 1/10 {
			gen tr`i'_`x'_clean = tr`i'_`x'
			label values tr`i'_`x'_clean yes_no
		}
	}

	**tr4 (contact ward councilor)
	label var tr4_1_clean "Contacted ward councilor by letter"
	label var tr4_2_clean "Contacted ward councilor by phone call"
	label var tr4_3_clean "Contacted ward councilor by email"
	label var tr4_5_clean "Contacted ward councilor by SMS"
	label var tr4_6_clean "Contacted ward councilor through the chief"
	label var tr4_7_clean "Contacted ward councilor through an elected official"
	label var tr4_8_clean "Contacted ward councilor through non-elected official"
	label var tr4_9_clean "Contacted ward councilor by going to his/her office"
	label var tr4_10_clean "Contacted ward councilor through market committee"

	**tr6 (contacted other district official
	label var tr6_1_clean "Contacted other dist official by letter"
	label var tr6_2_clean "Contacted other dist official by phone call"
	label var tr6_3_clean "Contacted other dist official by email"
	label var tr6_5_clean "Contacted other dist official by SMS"
	label var tr6_6_clean "Contacted other dist official through the chief"
	label var tr6_7_clean "Contacted other dist official through an elected official"
	label var tr6_8_clean "Contacted other dist official through non-elected official"
	label var tr6_9_clean "Contacted other dist official by going to his/her office"
	label var tr6_10_clean "Contacted other dist official through market committee"

//tr7a,b and tr8 (where order was randomized; takes these randomizations and combines information into one variable)
	**makes random_tr7, which randomizes order of two statements, into an integer

	gen random_tr7_real = real(random_tr7)
	label var random_tr7_real "Random uniform draw, decides which statement respondents see first"

	**then makes an indicator variable out of it
	gen random_tr7_ind = 0 if random_tr7_real < .5
	replace random_tr7_ind = 1 if random_tr7_real >= .5 & random_tr7_real !=.
	label define random_tr7_ind 0 "More important to have dist gov that can get things done, first" 1 "More important to hold dist gov accountable, first"
	label values random_tr7_ind random_tr7_ind
	label var random_tr7_ind "0:strong gov, even if no influence, first; 1:accountable gov, even if slow, first"

	**this code cleans the two alternatives of tr7
	**and makes them into string variables, as this is necessary for combining them later
		foreach x in a b {

			if "`x'" == "a" {
				gen tr7`x'_clean = "It is more important to have a district government that can get things done, even if we have no influence over what it does." if tr7`x' == 1
				replace tr7`x'_clean = "It is more important for citizens to be able to hold district government accountable, even if that means it makes decisions more slowly." if tr7`x' == 2
			}
			else if "`x'" == "b" {
				gen tr7`x'_clean = "It is more important for citizens to be able to hold district government accountable, even if that means it makes decisions more slowly." if tr7`x' == 1
				replace tr7`x'_clean = "It is more important to have a district government that can get things done, even if we have no influence over what it does." if tr7`x' == 2
			}

			label var tr7`x'_clean "Strong gov is important, or holding gov accountable is important"
		}

	**this code combines two statement order versions of each variable into one variable
	gen temp_tr7 = ""

		foreach x in a b {
			replace temp_tr7 = tr7`x'_clean if tr7`x'_clean != ""
		}

	encode temp_tr7, generate (tr7_clean) //to make this variable into indicator
	**switching values around so that higher number means "higher" accountability then making it 0 1
	replace tr7_clean = abs(2 - tr7_clean)
	label def tr7_clean 0 "It is more important for citizens to be able to hold district government accountable, even if that means it makes decisions more slowly" 1 " It is more important to have a district government that can get things done, even if we have no influence over what it does.", replace
	label values tr7_clean tr7_clean
	label var tr7_clean "Agree with accountability more important (1), or strong, efficient gov (0)"

	drop temp_tr7 //dropping unnecessary temp_agree

	***check statement order
	ttest tr7_clean, by(random_tr7_ind)

//tr8
	gen tr8_clean =.
	replace tr8_clean = tr8 if tr8 !=. & tr8 >= 0
	label def tr8_clean 1 "strongly" 2 "very strongly"
	label values tr8_clean tr8_clean
	label var tr8_clean "How strongly agree with chosen tr7_clean statement. 1=strongly, 2=very strongly"

/// FINAL REORDER OF VARIABLES
	*order surveytype2 sup enum district market name_first name_last Demographics gender home_district current_district tribe age age_range education education2 reading_language literacy household_income household_income_top household_income_trim sell_freq stall_type stall_type2 profit profit_top profit_trim profit_monthly Main_Outcomes list_treated list_outcome order_indicator fee1_full fee1_part fee1_none fee2_always fee2_sometimes fee2_never statement_order2 tax_morale tax_morale_strength Receipts receipt receipt_check receipt_check_date receipt_date receipt_official Other_Outcomes contact_gov satisfaction_gov satisfaction_dev

**save file as new version
	save "data\1_raw\marketvendor_BASELINEFINAL_noID_clean_v5.dta"
